The purpose of this workflow is to identify aging signatures by comparing gene expression at 12 months and 4 months in WT/WT mice.
We can then look for accelerated or delayed versions of this in the various Kl variants.
rm(list = ls())
library(here)
results.dir <- here("Results", "Aging")
if(!file.exists(results.dir)){dir.create(results.dir)}
general.data.dir <- here("Data", "general")
geno.cols <- c("WT/WT" = "darkgray", "WT/VS" = "#a6bddb", "VS/VS" = "#2b8cbe", "WT/FC" = "#a1d99b", "FC/FC" = "#31a354")
all.fun <- list.files(here("Code"), full.names = TRUE)
for(i in 1:length(all.fun)){source(all.fun[i])}
group.processed.data.dir <- here("Results", "all", "processed_data")
general.processed.data.dir <- here("Results", "Processed_Data")
needed.libraries <- c("pheatmap", "DT", "gprofiler2")
load_libraries(needed.libraries)
#gene information tables
mouse.gene.table <- read.table(file.path(general.data.dir, "mouse_gene_info.txt"), sep = "\t", header = TRUE)
scaled.expr <- readRDS(file.path(group.processed.data.dir, paste0("Scaled_Expression.RDS")))
covar.table <- read.csv(file.path(group.processed.data.dir, "mouse_info.csv"))
We want to identify an aging signature in the WT mice. We can then look in the different Kl genotypes to see if there is an acceleration or delay of aging based on Kl genotype.
We fit a linear model to explain gene expression using age. The following plot shows the
#pull out wt mice
wt.idx <- which(covar.table[,"genotype"] == "WT/WT")
u_age <- sort(unique(covar.table[,"age_batch"]))
wt_age <- lapply(u_age, function(x) intersect(wt.idx, which(covar.table[,"age_batch"] == x)))
group_id <- lapply(wt_age, function(x) if(length(x) > 0){rownames(covar.table)[x]})
names(group_id) <- u_age
#adjust expression by non-age covariates (sex, batch)
dummy.covar <- dummy_covar(covar.table[,c("sequencingBatch", "sex_ge")])
adj_expr <- adjust(t(scaled.expr), dummy.covar)
wt.id <- unlist(group_id)
#fit a model in the WT mice to gene expression with age as the explanatory variable
age.test <- apply(adj_expr[wt.id,], 2, function(x) lm(x~covar.table[wt.id,"age_batch"]))
age.p <- sapply(age.test, function(x) coef(summary(x))[2,"Pr(>|t|)"])
age.effect <- sapply(age.test, function(x) coef(summary(x))[2,"Estimate"])
The following plots show the p value distribution and the volcano plot for age-related changes in gene expression in the WT mice.
fdr.thresh = 0.1
Red dots indicate genes that are significantly differentially expressed with age at an FDR of 0.1.
adj.p <- p.adjust(age.p, "fdr")
sig.idx <- which(adj.p <= fdr.thresh)
sig.col <- rep("black", length(adj.p))
sig.col[sig.idx] <- "red"
par(mfrow = c(1,2))
qqunif.plot(age.p)
plot(age.effect, -log10(age.p), col = sig.col, pch = 16)
A table of these genes is shown below.
gene.id <- colnames(adj_expr)[sig.idx]
gene.idx <- match(gene.id, mouse.gene.table[,"ensembl_gene_id"])
diff.gene <- cbind(mouse.gene.table[gene.idx,], signif(age.effect[gene.id],2),
signif(age.p[gene.id], 2), signif(adj.p[gene.id], 2))
colnames(diff.gene)[c(7:9)] <- c("effect", "p", "p_adjusted")
datatable(diff.gene)
The following tables show functional enrichments for up-, down-, and any differentially expressed gene.
up_idx <- which(diff.gene[,"effect"] > 0)
down_idx <- which(diff.gene[,"effect"] < 0)
up_enrich <- gost(diff.gene[up_idx, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
down_enrich <- gost(diff.gene[down_idx, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
all_enrich <- gost(diff.gene[, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
plot.enrichment(up_enrich, plot.label = "Up-regulated with age")
plot.enrichment(down_enrich, plot.label = "Down-regulated with age")
plot.enrichment(all_enrich, plot.label = "Different with age")
The following heat map shows gene expression in WT/WT animals for the genes that were differentially expressed by age.
diff.expr <- adj_expr[wt.id,gene.id]
age.df <- data.frame("age" = as.factor(covar.table[wt.id,"age_batch"]))
rownames(age.df) <- wt.id
colnames(diff.expr) <- diff.gene[,"external_gene_name"]
gene.order <- order(pam(t(diff.expr), k = 2)$clustering)
ind.order <- order(pam(diff.expr, k = 2)$clustering)
pheatmap(t(diff.expr[ind.order,gene.order]), show_colnames = FALSE,
annotation_col = age.df, cluster_rows = FALSE, cluster_cols = FALSE)
The following plot shows that these genes more or less separate the WT mice by age.
wt.diff.decomp <- plot.decomp(diff.expr,
cols = as.numeric(as.factor(covar.table[wt.id,"age_batch"])),
main = "WT Animals separated by age", xlim = c(-0.2, 0.45), label.cex = 0.7)
The following plot shows how the differentially expressed genes are loaded. Genes are colored by whether they are up-regulated with aging (red) or downregulated with age (blue).
wt.diff.decomp <- plot.decomp(t(diff.expr), label.points = TRUE,
cols = colors.from.values(diff.gene[,"effect"],
use.pheatmap.colors = TRUE), main = "Age-Related Genes", xlim = c(-0.2, 0.45), label.cex = 0.7)
We then looked at the expression of these genes in the Kl variant carriers to see if they separated these animals by age as well.
Gene expression of these genes do a good job separating all genotypes by age. We show this in two ways. First by the decomposition of the aging gene expression matrix for the group. And second by weighting the individuals based on the loadings from the WT animals to give each animal an aging score. The bars are colored by age group.
kl.carrier.idx <- which(covar.table[,"genotype"] != "WT/WT")
groups <- unique(covar.table[kl.carrier.idx, c("age_batch", "genotype")])
u_geno <- unique(groups[,"genotype"])
cluster_sep <- rep(NA, length(u_geno))
names(cluster_sep) <- u_geno
par(mfrow = c(2,2))
for(g in 1:length(u_geno)){
geno.expr.idx <- which(covar.table[,"genotype"] == u_geno[g])
geno.expr <- adj_expr[geno.expr.idx, gene.id]
colnames(geno.expr) <- diff.gene[,"external_gene_name"]
g.age.df <- data.frame(covar.table[geno.expr.idx,"age_batch"])
colnames(g.age.df) <- "age"
rownames(g.age.df) <- rownames(covar.table)[geno.expr.idx]
#pheatmap(geno.expr, annotation_row = g.age.df)
age.group <- as.numeric(as.factor(covar.table[geno.expr.idx,"age_batch"]))
geno.decomp <- plot.decomp(geno.expr, cols = age.group,
main = u_geno[g])
ind.score <- (geno.expr)%*%wt.diff.decomp$u[,1,drop=FALSE]
ind.order <- order(ind.score[,1])
barplot(ind.score[ind.order,1], col = age.group[ind.order], names = NA,
ylab = "Aging Score", main = u_geno[g])
group.age <- unique(covar.table[geno.expr.idx,"age_batch"])
legend("topleft", legend = group.age, col = c(1:length(group.age)), pch = 16)
cl.test <- silhouette_coef(geno.decomp$u, membership = age.group)
cluster_sep[g] <- mean(cl.test, na.rm = TRUE)
#legend("topright", legend = unique(covar.table[wt.id,"age_batch"]), col = c(1,2), pch = 16)
}
#barplot(cluster_sep, beside = TRUE, col = geno.cols[names(cluster_sep)], main = "Cluster separation")
The following plot shows the decomposition of the aging genes for all animals colored by genotype. There doesn’t appear to be any difference based on genotype. In fact the WT/WT animals occupy the extreme values for PC1.
Can we conclude that overall, transcriptional aging is not affected by Kl variant?
par(mfrow = c(1,2))
test <- adj_expr[,gene.id]
age.decomp <- plot.decomp(test, cols = as.numeric(as.factor(covar.table[,"age_batch"])),
main = "Expression PCs Colored by Age")
geno.col <- rep("black", nrow(covar.table))
for(g in 1:length(geno.cols)){
geno.col[which(covar.table[,"genotype"] == names(geno.cols)[g])] <- geno.cols[g]
}
plot(age.decomp$u, col = geno.col, pch = 16, xlab = "PC1", ylab = "PC2",
main = "Expression PCs Colored by Genotype")
The aging scores across all animals are shown below.
test.score <- test%*%wt.diff.decomp$u[,1,drop=FALSE]
age.label <- as.numeric(as.factor(covar.table[,"age_batch"]))
score.order <- order(test.score)
par(mfrow = c(2,1), mar = c(0,4,2,2))
barplot(test.score[score.order,1], col = age.label[score.order], names = NA,
ylab = "Aging Score", main = "Colored by age")
par(mar = c(4,4,0,2))
barplot(test.score[score.order,1], col = geno.col[score.order], names = NA,
ylab = "Aging Score")
mtext("Colored by genotype", side = 3, line = -2.5, font = 2, cex = 1.2)
We can also look at age-associated genes by testing the interaction between genotype and age. To do this, we used VS/FC carrier status as genotype. So now, instead of looking for age-related genes in the WT animals, we are looking for genes that have main age effects and age-by-genotype interaction effects in the VS and FC carriers (WT mice are excluded here).
The following qq plots show p value distributions for the different pieces of the model.
test.idx <- setdiff(kl.carrier.idx, which(covar.table[,"age_batch"] == 24))
dummy.geno <- rep(0, length(test.idx))
dummy.geno[grep("VS", covar.table[test.idx,"genotype"])] <- 1
dummy.age <- as.numeric(as.factor(covar.table[test.idx,"age_batch"]))
test <- apply(adj_expr[test.idx,], 2, function(x) lm(x~dummy.geno*dummy.age))
test.coef <- t(sapply(test, coef))
test.p <- t(sapply(test, function(x) summary(x)$coefficients[,"Pr(>|t|)"]))
par(mfrow = c(2,2))
for(i in 2:4){
qqunif.plot(test.p[,i], plot.label = colnames(test.p)[i])
}
The following plot shows how many genes had significant effects from each of the terms. There were more genes that had significant age effects in this group. There were some that had an interaction between age and genotype.
int.p <- test.p[,4]
adj.int.p <- p.adjust(int.p, "fdr")
sig.int <- which(adj.int.p <= fdr.thresh)
sig.any <- apply(test.p, 2, function(x) which(p.adjust(x, "fdr") <= fdr.thresh))
num.sig <- sapply(sig.any, length)
barplot_with_num(num.sig[2:4])
The interaction plots for these genes are shown below.
int.labels <- paste(rep(c("4mo", "12mo"), 2), rep(c("FC", "VS"), each = 2))
groups <- unique(covar.table[test.idx,c("age_batch", "genotype")])
par(mfrow = c(2,3))
for(i in 1:length(sig.any[[4]])){
gene.id <- names(sig.any[[4]])[i]
gene.name <- mouse.gene.table[which(mouse.gene.table[,"ensembl_gene_id"] == gene.id), "external_gene_name"]
a <- boxplot(adj_expr[test.idx,names(sig.any[[4]])[i]]~dummy.age*dummy.geno, main = gene.name,
names = int.labels, xlab = "", ylab = "Expression")
#interaction.plot(dummy.age, dummy.geno, adj_expr[test.idx,names(sig.any[[4]])[i]], main = gene.name)
}
#cat(names(sig.any[[4]]), sep = "\n")
#mouse.gene.table[match(names(sig.any[[4]]), mouse.gene.table[,"ensembl_gene_id"]), "external_gene_name"]
We also looked at the genes that had a main effect of age in this group. The enrichments shown below are very similar to the age-related genes in the WT animals.
#play the same separation game as above, but with the new list
gene.id <- names(sig.any[[3]])
gene.idx <- match(gene.id, mouse.gene.table[,"ensembl_gene_id"])
diff.gene <- cbind(mouse.gene.table[gene.idx,], signif(test.coef[gene.id,"dummy.age"],2),
signif(test.p[gene.id,"dummy.age"], 2), signif(p.adjust(test.p[gene.id,"dummy.age"], "fdr"), 2))
colnames(diff.gene)[c(7:9)] <- c("effect", "p", "p_adjusted")
datatable(diff.gene)
up_idx <- which(diff.gene[,"effect"] > 0)
down_idx <- which(diff.gene[,"effect"] < 0)
up_enrich <- gost(diff.gene[up_idx, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
down_enrich <- gost(diff.gene[down_idx, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
all_enrich <- gost(diff.gene[, "ensembl_gene_id"], organism = "mmusculus",
sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
plot.enrichment(up_enrich, plot.label = "Up-regulated with age")
plot.enrichment(down_enrich, plot.label = "Down-regulated with age")
plot.enrichment(all_enrich, plot.label = "Different with age")
These genes do a similar job of separating the ages overall.
diff.expr <- adj_expr[test.idx,gene.id]
age.df <- data.frame("age" = as.factor(covar.table[test.idx,"age_batch"]))
rownames(age.df) <- rownames(adj_expr)[test.idx]
colnames(diff.expr) <- diff.gene[,"external_gene_name"]
gene.order <- order(pam(t(diff.expr), k = 2)$clustering)
ind.order <- order(pam(diff.expr, k = 2)$clustering)
pheatmap(t(diff.expr)[gene.order,ind.order], show_colnames = FALSE, annotation_col = age.df,
cluster_cols = FALSE, cluster_rows = FALSE)
The separation by age for all animals is shown below. It is similar to when we used the wild type animals.
age.col <- as.numeric(as.factor(covar.table[test.idx,"age_batch"]))
age.decomp <- plot.decomp(diff.expr, cols = age.col)
legend("topright", legend = unique(covar.table[test.idx,"age_batch"]), col = c(1,2), pch = 16)
The aging scores based on the first principal component of the gene expression matrix are shown below. It looks as if we really need both principal components to classify animals, not just the first.
gene.decomp <- plot.decomp(t(diff.expr), plot.results = FALSE)
aging.score <- diff.expr%*%gene.decomp$u[,1]
age.order <- order(aging.score[,1])
barplot(aging.score[age.order,1], col = age.col[age.order], names = NA,
ylab = "Aging Score")
ages <- unique(covar.table[test.idx,"age_batch"])
legend("topleft", legend = ages, col = c(1:length(ages)), pch = 16)